library(dplyr)
library(tidyr)
library(readstata13)

## set working directory to replication repository
survey <- read.dta13("Harris_Data/Harris 1966 National Election Survey, study no. 1623/harris_s1623_spss.dta")

# pid
survey$pid <- c(1:nrow(survey))

# study number (study)

survey$study <- as.character(1623)

# study year (year)

survey$year <- 1966

# geographic data (urban)

survey$urban <- NA

# geographic region (region)

summary(survey$region)
survey$region <- as.character(survey$region)
table(survey$region)

# head of household (hh)

survey$hh <- NA

# inequality increasing (inequality)

summary(survey$P19A_1) # already harmonised

survey$inequality <- as.character(survey$P19A_1)
table(survey$inequality)

# inequality.variable
survey$inequality.variable <- 1

# union varibles (union.self and union.other)

summary(survey$F4_1) # already harmonized
survey$union.self <- as.character(survey$F4_1)

summary(survey$F4_2) # already harmonized
survey$union.other <- as.character(survey$F4_2)

# employed and type (employed)

summary(survey$F2)
survey$employed <- as.character(survey$F2)
table(survey$employed)
class(survey$employed)

# employed.self 

survey$employed.self <- NA

# employment 2 (occupation)

survey$occupation <- as.character(survey$F3)

# occupation.self

survey$occupation.self <- NA

# household size (hhsize)

survey$hhsize <- as.character(survey$F7)

# education (educ)

summary(survey$F11)
levels(survey$F11)
survey$educ <-  as.character(survey$F11)
table(survey$educ)

# household income (income)
table(survey$F14)
survey$income <- as.character(survey$F14)

# age (age)
survey[!is.na(survey$F13_1) & !is.na(survey$F13_2), c("F13_1", "F13_2")]

survey$age <- ifelse(is.na(survey$F13_1), as.character(survey$F13_2), 
                     as.character(survey$F13_1))
table(survey$age)
survey$age <- as.character(survey$age)


# race

survey$race <- as.character(survey$F15)
table(survey$race)

# political orientation (party)

summary(survey$Q2A)
survey$party <- as.character(survey$Q2A)
table(survey$party)

# ideology

summary(survey$P16M)
survey$ideology <- as.character(survey$P16M)
table(survey$ideology)

# gender
## age asked separately for men and women
## can use those values to determine gender
table(survey$F13_2)
survey$gender <- ifelse(is.na(survey$F13_1) & !is.na(survey$F13_2), "Female", 
                        ifelse(is.na(survey$F13_2) & !is.na(survey$F13_1), "Male", NA))
survey$gender <- as.character(survey$gender)
table(survey$gender)

# religion
survey$religion <- as.character(survey$F12)
table(survey$religion)

## factuals 

survey$factual1 <- NA
survey$factual2 <- NA
survey$factual3 <- NA

## alienation index
survey$dontcare <- as.character(survey$P19A_2)
survey$dontcount <- as.character(survey$P19A_4)
survey$leftout <- as.character(survey$P19A_10)


### placement of question
survey$question_place <- "after_party"


# subset
survey_1623 <- survey[,c("pid", "study", "year", "urban", "region", "hh",
                         "inequality", "inequality.variable", "union.self", "union.other",
                         "employed", "employed.self", "occupation", "occupation.self", "hhsize", "educ", "income", 
                         "age", "race", "party", "ideology", "gender", "religion",
                         "factual1", "factual2", "factual3", "dontcare", "dontcount", "leftout",
                         "question_place")]


# save file
#saveRDS(survey_1623, file = "Harris_Data/survey_1623.rds")

